Besides checking the results of DE analysis, also save the sets of DE genes out, to prepare for next step of gene set enrichment analysis.
library(ggplot2)
library(ggrepel)
library(ggpubr)
library(stringr)
library(MASS)
library(RColorBrewer)
library(DESeq2)
library(viridis)
library(ggpointdensity)
library(dplyr)
library(data.table)
library(ComplexHeatmap)
library(UpSetR)
library(readxl)
theme_set(theme_classic())
#path = "../../data/MorPhiC/June-2024/JAX_RNAseq_Neuroectoderm/"
path = "../../MorPhiC/data/June-2024/JAX_RNAseq_Neuroectoderm/"
dat_path = paste0(path, "processed/release110-gencode44/Tables")From searching online, my guess is the model_organ content inside the bracket should be
dorsal forebrain patterning
instead of
dorsal forebrain pattering
But leave it as it is for now.
meta = fread("data/June_2024/JAX_RNAseq_Neuroectoderm_meta_data.tsv",
data.table = FALSE)
dim(meta)## [1] 174 32
meta[1:2,]## biomaterial_id lib_biomaterial_id differentiated_biomaterial_id
## 1 MOK20002-WT GT23-05635 CBO-MOK20002-WT-Day7
## 2 MOK20002-WT GT23-05625 CBO-MOK20002-WT-Day6
## differentiated_biomaterial_description differentiation_protocol
## 1 KOLF2.2 derived cortical brain organoid JAXPD003
## 2 KOLF2.2 derived cortical brain organoid JAXPD003
## differentiated_timepoint_value differentiated_timepoint_unit
## 1 7 days
## 2 6 days
## differentiated_terminally_differentiated
## 1 No
## 2 No
## model_organ ko_strategy ko_gene
## 1 Cortical brain (dorsal forebrain pattering) WT WT
## 2 Cortical brain (dorsal forebrain pattering) WT WT
## library_preparation_protocol dissociation_protocol fragment_size input_amount
## 1 JAXPL001 N.A 410 300
## 2 JAXPL001 N.A 425 300
## input_unit final_yield final_yield_unit lib_concentration
## 1 ngs 249.6 ngs 51.8
## 2 ngs 170.0 ngs 33.8
## lib_concentration_unit PCR_cycle PCR_cycle_sample_index
## 1 nM 10 NA
## 2 nM 10 NA
## file_id sequencing_protocol
## 1 RFX_WT_Day7_GT23-05635_GCACGGAC-TGCGAGAC_S55_L002 JAXPS001
## 2 RFX_WT_Day6_GT23-05625_GACCTGAA-CTCACCAA_S97_L002 JAXPS001
## run_id biomaterial_description
## 1 20230424_23-scbct-028-run3 KOLF2.2
## 2 20230424_23-scbct-028-run3 KOLF2.2
## derived_cell_line_accession clone_id protocol_id zygosity type model_system
## 1 KOLF2.2J WT N.A N.A iPSC CBO
## 2 KOLF2.2J WT N.A N.A iPSC CBO
names(meta)## [1] "biomaterial_id"
## [2] "lib_biomaterial_id"
## [3] "differentiated_biomaterial_id"
## [4] "differentiated_biomaterial_description"
## [5] "differentiation_protocol"
## [6] "differentiated_timepoint_value"
## [7] "differentiated_timepoint_unit"
## [8] "differentiated_terminally_differentiated"
## [9] "model_organ"
## [10] "ko_strategy"
## [11] "ko_gene"
## [12] "library_preparation_protocol"
## [13] "dissociation_protocol"
## [14] "fragment_size"
## [15] "input_amount"
## [16] "input_unit"
## [17] "final_yield"
## [18] "final_yield_unit"
## [19] "lib_concentration"
## [20] "lib_concentration_unit"
## [21] "PCR_cycle"
## [22] "PCR_cycle_sample_index"
## [23] "file_id"
## [24] "sequencing_protocol"
## [25] "run_id"
## [26] "biomaterial_description"
## [27] "derived_cell_line_accession"
## [28] "clone_id"
## [29] "protocol_id"
## [30] "zygosity"
## [31] "type"
## [32] "model_system"
table(meta$model_organ, meta$ko_gene)##
## FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
## Cortical brain (dorsal forebrain pattering) 36 36 9 27 9 36 21
table(meta$run_id, meta$ko_gene)##
## FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
## 20230228_23-scbct-008 0 0 0 0 9 0 1
## 20230424_23-scbct-028-run3 0 0 0 0 0 27 3
## 20230508_23-scbct-039-run2 27 0 0 0 0 0 3
## 20230522_23-scbct-052 0 27 0 0 0 0 3
## 20230824_23-scbct-092 0 0 9 0 0 0 1
## 20240131_23-robson-020 0 0 0 27 0 0 3
## 20240307_24-robson-003 0 0 0 0 0 9 1
## 20240307_24-robson-004 0 9 0 0 0 0 3
## 20240307_24-robson-006 9 0 0 0 0 0 3
table(meta$ko_strategy, meta$ko_gene)##
## FEZF2 LHX5 LHX9 OTX1 PAX6 RFX4 WT
## CE 12 12 3 9 3 12 0
## KO 12 12 3 9 3 12 0
## PTC 12 12 3 9 3 12 0
## WT 0 0 0 0 0 0 21
For the row with column differentiated_biomaterial_id==“CBO-MOK20004-WT-Day3”, it should have value 3 instead of the original 4 in the column differentiated_timepoint_value.
print("Before correcting the error in timepoint:")## [1] "Before correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)##
## 3 4 5 6 7 8 9 14
## FEZF2 0 0 9 9 9 9 0 0
## LHX5 9 9 9 9 0 0 0 0
## LHX9 0 0 0 0 0 0 0 9
## OTX1 0 0 0 0 9 9 9 0
## PAX6 0 0 0 0 9 0 0 0
## RFX4 0 0 0 9 9 9 9 0
## WT 0 2 2 5 4 5 2 1
meta[which(meta$differentiated_biomaterial_id=="CBO-MOK20004-WT-Day3"),
"differentiated_timepoint_value"] = 3
print("After correcting the error in timepoint:")## [1] "After correcting the error in timepoint:"
print("Table between ko_gene and differentiated_timepoint_value:")## [1] "Table between ko_gene and differentiated_timepoint_value:"
table(meta$ko_gene, meta$differentiated_timepoint_value)##
## 3 4 5 6 7 8 9 14
## FEZF2 0 0 9 9 9 9 0 0
## LHX5 9 9 9 9 0 0 0 0
## LHX9 0 0 0 0 0 0 0 9
## OTX1 0 0 0 0 9 9 9 0
## PAX6 0 0 0 0 9 0 0 0
## RFX4 0 0 0 9 9 9 9 0
## WT 1 1 2 5 4 5 2 1
cts = fread(file.path(dat_path, "genesCounts.csv"), data.table = FALSE)
dim(cts)## [1] 38592 175
cts[1:2, c(1:2, (ncol(cts)-1):ncol(cts))]## Name RFX_CE_F5_Day8_GT23-05641_GTCGGAGC-TTATAACC_S73_L002
## 1 ENSG00000268674 0
## 2 ENSG00000271254 1521
## LHX5_CE_A1_Day3_GT23-07300_AAGTCCAA-TACTCATA_S163_L004
## 1 0
## 2 1270
## LHX5_KO_E4_Day5_GT23-07318_AACGTTCC-AGTACTCC_S166_L004
## 1 0
## 2 746
stopifnot(setequal(names(cts)[-1], meta$file_id))
meta = meta[match(names(cts)[-1], meta$file_id),]
table(names(cts)[-1] == meta$file_id)##
## TRUE
## 174
cts_dat = data.matrix(cts[,-1])
rownames(cts_dat) = cts$Namemodel_s = meta$model_system
table(model_s, useNA="ifany")## model_s
## CBO
## 174
table(meta$model_system, meta$model_organ, useNA="ifany")##
## Cortical brain (dorsal forebrain pattering)
## CBO 174
get_q75 <- function(x){quantile(x, probs = 0.75)}
# given that there is only one level for model_system
# the result from apply(cts_dat, 1, tapply, model_s, min) is no longer a matrix
# but a vector
# do not do transpose to the results from apply(cts_dat, 1, tapply, model_s, min)
gene_info = data.frame(Name = cts$Name,
apply(cts_dat, 1, tapply, model_s, min),
apply(cts_dat, 1, tapply, model_s, median),
apply(cts_dat, 1, tapply, model_s, get_q75),
apply(cts_dat, 1, tapply, model_s, max))
dim(gene_info)## [1] 38592 5
gene_info[1:2, ]## Name apply.cts_dat..1..tapply..model_s..min.
## ENSG00000268674 ENSG00000268674 0
## ENSG00000271254 ENSG00000271254 426
## apply.cts_dat..1..tapply..model_s..median.
## ENSG00000268674 0
## ENSG00000271254 1318
## apply.cts_dat..1..tapply..model_s..get_q75.
## ENSG00000268674 0.00
## ENSG00000271254 1701.75
## apply.cts_dat..1..tapply..model_s..max.
## ENSG00000268674 3
## ENSG00000271254 3117
table(row.names(gene_info)==gene_info$Name)##
## TRUE
## 38592
names(gene_info)[2:5] = paste0(rep(c("CBO_"), times=4),
rep(c("min", "median", "q75", "max"), each=1))
dim(gene_info)## [1] 38592 5
gene_info[1:2,]## Name CBO_min CBO_median CBO_q75 CBO_max
## ENSG00000268674 ENSG00000268674 0 0 0.00 3
## ENSG00000271254 ENSG00000271254 426 1318 1701.75 3117
summary(gene_info[,-1])## CBO_min CBO_median CBO_q75 CBO_max
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 2
## Median : 0.0 Median : 3.0 Median : 5.8 Median : 22
## Mean : 213.8 Mean : 699.1 Mean : 931.4 Mean : 1829
## 3rd Qu.: 89.0 3rd Qu.: 332.1 3rd Qu.: 474.6 3rd Qu.: 1003
## Max. :88969.0 Max. :489181.5 Max. :586205.0 Max. :1301990
table(gene_info$CBO_q75 > 0)##
## FALSE TRUE
## 14390 24202
w2kp = which(gene_info$CBO_q75 > 0)
cts_dat = cts_dat[w2kp,]
dim(cts_dat)## [1] 24202 174
gene_info = gene_info[w2kp,]gene_anno = fread("data/gencode_v44_primary_assembly_info.tsv")
dim(gene_anno)## [1] 62754 8
gene_anno[1:2,]## geneId chr strand start end ensembl_gene_id
## <char> <char> <char> <int> <int> <char>
## 1: ENSG00000000003.16 chrX - 100627108 100639991 ENSG00000000003
## 2: ENSG00000000005.6 chrX + 100584936 100599885 ENSG00000000005
## hgnc_symbol description
## <char> <char>
## 1: TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858]
## 2: TNMD tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757]
# all genes are contained in the annotation file
table(gene_info$Name %in% gene_anno$ensembl_gene_id)##
## TRUE
## 24202
setdiff(gene_info$Name, gene_anno$ensembl_gene_id)## character(0)
gene_info = merge(gene_anno, gene_info, by.x="ensembl_gene_id", by.y = "Name",
all.x = FALSE, all.y = TRUE)
dim(gene_info)## [1] 24202 12
gene_info[1:2,]## Key: <ensembl_gene_id>
## ensembl_gene_id geneId chr strand start end
## <char> <char> <char> <char> <int> <int>
## 1: ENSG00000000003 ENSG00000000003.16 chrX - 100627108 100639991
## 2: ENSG00000000005 ENSG00000000005.6 chrX + 100584936 100599885
## hgnc_symbol description CBO_min
## <char> <char> <int>
## 1: TSPAN6 tetraspanin 6 [Source:HGNC Symbol;Acc:HGNC:11858] 744
## 2: TNMD tenomodulin [Source:HGNC Symbol;Acc:HGNC:17757] 0
## CBO_median CBO_q75 CBO_max
## <num> <num> <int>
## 1: 2644.0 3235.00 5449
## 2: 9.5 20.75 97
gene_info[which(!gene_info$ensembl_gene_id%in%gene_anno$ensembl_gene_id)]## Key: <ensembl_gene_id>
## Empty data.table (0 rows and 12 cols): ensembl_gene_id,geneId,chr,strand,start,end...
gene_info[gene_info$CBO_min > 2e4, c("CBO_min", "hgnc_symbol")]## CBO_min hgnc_symbol
## <int> <char>
## 1: 22057 ACTB
## 2: 23713 HNRNPA1
## 3: 66180 EEF1A1
## 4: 23858 TUBB
## 5: 88639 MT-CO1
## 6: 29947 MT-ND4
## 7: 39231 MT-CO3
## 8: 88969 MALAT1
gene_info$gene_symbol = gene_info$hgnc_symbol
table(gene_info$gene_symbol == "")##
## FALSE TRUE
## 19133 5069
table(is.na(gene_info$gene_symbol))##
## FALSE
## 24202
wEns = which(gene_info$gene_symbol == "" | is.na(gene_info$gene_symbol))
gene_info$gene_symbol[wEns] = gene_info$ensembl_gene_id[wEns]
table(gene_info$gene_symbol == "")##
## FALSE
## 24202
table(is.na(gene_info$gene_symbol))##
## FALSE
## 24202
release_name = "2024_06_JAX_RNAseq_Neuroectoderm"
result_dir = file.path("results", release_name)
processed_output_dir = file.path(result_dir, "processed")
all_result_files = list.files(path=processed_output_dir, pattern=".tsv",
all.files=TRUE, full.names=FALSE)
all_result_files = sort(all_result_files)
extract_cores <- function(x){
x_split = str_split(x, "_")[[1]]
x_start = 6
x_end = length(x_split)-1
x_d_group = paste(x_split[x_start:(x_end-1)], collapse="_")
x_gene = x_split[x_end]
return(c(x_d_group, x_gene))
}
df_file_info = NULL
for (x in all_result_files){
df_file_info = rbind(df_file_info, extract_cores(x))
}
df_file_info = as.data.frame(df_file_info)
colnames(df_file_info) = c("DE_group", "gene")
df_file_info## DE_group gene
## 1 CBO_day_3_4 LHX5
## 2 CBO_day_5 FEZF2
## 3 CBO_day_5 LHX5
## 4 CBO_day_6 FEZF2
## 5 CBO_day_6 LHX5
## 6 CBO_day_6 RFX4
## 7 CBO_day_7 FEZF2
## 8 CBO_day_7 OTX1
## 9 CBO_day_7 PAX6
## 10 CBO_day_7 RFX4
## 11 CBO_day_8 FEZF2
## 12 CBO_day_8 OTX1
## 13 CBO_day_8 RFX4
## 14 CBO_day_9 OTX1
## 15 CBO_day_9 RFX4
For this dataset, there is only one model system, and for DE group (day) and each knockout gene, there are three knockout strategies.
Do it separately for each DE group (day).
df_file_info$items = paste(df_file_info$DE_group,
df_file_info$gene, sep="_")
df_file_info$items## [1] "CBO_day_3_4_LHX5" "CBO_day_5_FEZF2" "CBO_day_5_LHX5" "CBO_day_6_FEZF2"
## [5] "CBO_day_6_LHX5" "CBO_day_6_RFX4" "CBO_day_7_FEZF2" "CBO_day_7_OTX1"
## [9] "CBO_day_7_PAX6" "CBO_day_7_RFX4" "CBO_day_8_FEZF2" "CBO_day_8_OTX1"
## [13] "CBO_day_8_RFX4" "CBO_day_9_OTX1" "CBO_day_9_RFX4"
fc0 = log2(1.5)
p0 = 0.05
gene_symbols = df_file_info$gene
gene_symbols## [1] "LHX5" "FEZF2" "LHX5" "FEZF2" "LHX5" "RFX4" "FEZF2" "OTX1" "PAX6"
## [10] "RFX4" "FEZF2" "OTX1" "RFX4" "OTX1" "RFX4"
table(gene_symbols %in% gene_info$gene_symbol)##
## TRUE
## 15
genes_w_multi_ensembl = names(which(table(gene_info$gene_symbol)>1))
genes_w_multi_ensembl## [1] "LINC01238"
sorted_de_grps = sort(unique(df_file_info$DE_group))
for (d_group in sorted_de_grps){
df_file_info_dg = df_file_info[which(df_file_info$DE_group==d_group), ]
df = data.frame(KO = df_file_info_dg$items,
gene_symbol = df_file_info_dg$gene,
CE_nDE = NA, KO_nDE = NA, PTC_nDE = NA,
CE_padj = NA, CE_log2FoldChange = NA,
KO_padj = NA, KO_log2FoldChange = NA,
PTC_padj = NA, PTC_log2FoldChange = NA)
df$Model = str_extract(df$KO, "^[a-zA-Z0-9]+")
gene_ids = gene_info$ensembl_gene_id[match(df_file_info_dg$gene, gene_info$gene_symbol)]
for (k in 1:nrow(df_file_info_dg)){
res = fread(file.path(processed_output_dir,
paste0(release_name, "_", df_file_info_dg$items[k], "_DEseq2.tsv")),
data.table = FALSE)
print(res[1:2,1:15])
gene_id = gene_ids[k]
w_gene = which(res$gene_ID == gene_id)
stopifnot(length(w_gene) == 1)
stopifnot(!(df$gene_symbol[k]%in%genes_w_multi_ensembl))
for(ko1 in c("CE", "KO", "PTC")){
fc = paste0(df$gene_symbol[k], "_", ko1, "_log2FoldChange")
padj = paste0(df$gene_symbol[k], "_", ko1, "_padj")
col_name = paste0(ko1, "_nDE")
df[k,col_name] = sum(abs(res[[fc]]) > fc0 & res[[padj]] < p0, na.rm = TRUE)
col_name = paste0(ko1, "_log2FoldChange")
df[k,col_name] = res[[fc]][w_gene]
col_name = paste0(ko1, "_padj")
df[k,col_name] = res[[padj]][w_gene]
}
}
print(df)
p1 <- ggplot(df, aes(x=(PTC_nDE), y=(CE_nDE),
label=gene_symbol)) +
geom_point() + scale_shape_manual(values = c(7, 16)) +
geom_text_repel() + ggtitle("# of DE genes") +
labs(x="PTC", y="CE") +
geom_abline(intercept = 0, slope = 1, color = "orange", linetype = "dashed")
p2 <- ggplot(df, aes(x=(PTC_nDE), y=(KO_nDE),
label=gene_symbol)) +
geom_point() + scale_shape_manual(values = c(7, 16)) +
geom_text_repel() + ggtitle("# of DE genes") +
labs(x="PTC", y="KO") +
geom_abline(intercept = 0, slope = 1, color = "orange", linetype = "dashed")
p3 <- ggplot(df, aes(x=CE_log2FoldChange, y=-log10(CE_padj),
label=gene_symbol)) +
geom_point(size=2) + scale_shape_manual(values = c(7, 16)) +
geom_text_repel() + ggtitle("DE results of the KO genes, CE") +
labs(x="log2 fold change", y="-log10(adjusted p-value)") +
geom_hline(yintercept = 2, color = "grey", linetype = "dashed") +
geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
linetype = "dashed")
p4 <- ggplot(df, aes(x=KO_log2FoldChange, y=-log10(KO_padj),
label=gene_symbol)) +
geom_point(size=2) + scale_shape_manual(values = c(7, 16)) +
geom_text_repel() + ggtitle("DE results of the KO genes, KO") +
labs(x="log2 fold change", y="-log10(adjusted p-value)") +
geom_hline(yintercept = 2, color = "grey", linetype = "dashed") +
geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
linetype = "dashed")
p5 <- ggplot(df, aes(x=PTC_log2FoldChange, y=-log10(PTC_padj),
label=gene_symbol)) +
geom_point(size=2) + scale_shape_manual(values = c(7, 16)) +
geom_text_repel() + ggtitle("DE results of the KO genes, PTC") +
labs(x="log2 fold change", y="-log10(adjusted p-value)") +
geom_hline(yintercept = 2, color = "grey", linetype = "dashed") +
geom_vline(xintercept = c(log2(1/1.5), log2(1.5)), color = "grey",
linetype = "dashed")
g_combined <- ggarrange(p1, p2, p3, p4, p5, ncol = 2, nrow = 3)
print(annotate_figure(g_combined,
top = text_grob(d_group,
face = "bold", size = 16)))
}## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000276345 KI270721.1 + 2585 11802
## LHX5_CE_log2FoldChange LHX5_CE_pvalue LHX5_CE_padj LHX5_KO_log2FoldChange
## 1 0.0376 0.707 0.957 0.025
## 2 1.4400 0.628 NA 0.106
## LHX5_KO_pvalue LHX5_KO_padj LHX5_PTC_log2FoldChange LHX5_PTC_pvalue
## 1 0.802 1 0.0541 0.588
## 2 0.972 NA -0.3200 0.915
## LHX5_PTC_padj
## 1 0.974
## 2 0.996
## KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_3_4_LHX5 LHX5 10 7 11 0.928 0.455
## KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 1 -1.12 0.855 -2.07 CBO
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000276345 KI270721.1 + 2585 11802
## FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1 0.178 0.127 0.393 0.167
## 2 -1.600 0.597 NA -1.490
## FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1 0.151 0.654 0.149 0.199
## 2 0.619 NA 0.558 0.846
## FEZF2_PTC_padj
## 1 0.585
## 2 NA
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000276345 KI270721.1 + 2585 11802
## LHX5_CE_log2FoldChange LHX5_CE_pvalue LHX5_CE_padj LHX5_KO_log2FoldChange
## 1 0.216 0.0788 0.698 0.225
## 2 2.600 0.4470 NA 3.900
## LHX5_KO_pvalue LHX5_KO_padj LHX5_PTC_log2FoldChange LHX5_PTC_pvalue
## 1 0.0673 0.276 0.210 0.0888
## 2 0.2470 NA -0.257 0.9420
## LHX5_PTC_padj
## 1 1
## 2 NA
## KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_5_FEZF2 FEZF2 182 9 22 NA -4.640
## 2 CBO_day_5_LHX5 LHX5 2 172 1 0.96 0.101
## KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 NA -5.6900 NA -1.05 CBO
## 2 0.978 -0.0183 0.0949 -1.06 CBO
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000276345 KI270721.1 + 2585 11802
## FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1 0.209 0.283 0.682 0.11
## 2 -3.020 0.453 NA -17.60
## FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1 5.71e-01 1 0.188 0.334
## 2 1.56e-05 NA -0.995 0.800
## FEZF2_PTC_padj
## 1 1
## 2 NA
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000276345 KI270721.1 + 2585 11802
## LHX5_CE_log2FoldChange LHX5_CE_pvalue LHX5_CE_padj LHX5_KO_log2FoldChange
## 1 0.244 0.0915 0.902 0.0231
## 2 6.470 0.0439 NA 7.7500
## LHX5_KO_pvalue LHX5_KO_padj LHX5_PTC_log2FoldChange LHX5_PTC_pvalue
## 1 0.8890 1 0.00553 0.9700
## 2 0.0312 NA 7.28000 0.0225
## LHX5_PTC_padj
## 1 1
## 2 NA
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000276345 KI270721.1 + 2585 11802
## RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1 -0.127 0.522 1 -0.0382
## 2 6.280 0.157 NA -12.7000
## RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1 0.84500 0.977 -0.127 0.519
## 2 0.00486 NA 5.210 0.240
## RFX4_PTC_padj
## 1 1
## 2 NA
## KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_6_FEZF2 FEZF2 37 6 9 NA -2.120
## 2 CBO_day_6_LHX5 LHX5 72 1 3 0.551 -1.290
## 3 CBO_day_6_RFX4 RFX4 6 12 10 1.000 -0.625
## KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 0.0023 -6.270 1 -0.0381 CBO
## 2 1.0000 -0.423 1 -0.3610 CBO
## 3 0.9570 0.569 1 -0.8490 CBO
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1 -0.1590 0.372 0.686 -0.0756
## 2 -0.0632 0.822 0.935 0.1980
## FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1 0.672 1 -0.0741 0.678
## 2 0.482 1 0.3160 0.261
## FEZF2_PTC_padj
## 1 1
## 2 1
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## OTX1_CE_log2FoldChange OTX1_CE_pvalue OTX1_CE_padj OTX1_KO_log2FoldChange
## 1 0.434 0.0177 1 0.2230
## 2 -0.321 0.2610 1 -0.0552
## OTX1_KO_pvalue OTX1_KO_padj OTX1_PTC_log2FoldChange OTX1_PTC_pvalue
## 1 0.224 1 0.0815 0.658
## 2 0.846 1 -0.1100 0.698
## OTX1_PTC_padj
## 1 1
## 2 1
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## PAX6_CE_log2FoldChange PAX6_CE_pvalue PAX6_CE_padj PAX6_KO_log2FoldChange
## 1 0.0158 0.929 1 0.0621
## 2 0.0929 0.741 1 -0.0896
## PAX6_KO_pvalue PAX6_KO_padj PAX6_PTC_log2FoldChange PAX6_PTC_pvalue
## 1 0.726 1 -0.121 0.493
## 2 0.750 1 0.290 0.302
## PAX6_PTC_padj
## 1 1
## 2 1
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1 -0.1020 0.570 1 -0.000999
## 2 -0.0515 0.855 1 -0.022300
## RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1 0.996 1 -0.115 0.521
## 2 0.937 1 0.114 0.685
## RFX4_PTC_padj
## 1 1
## 2 1
## KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_7_FEZF2 FEZF2 1 3 0 NA -0.296
## 2 CBO_day_7_OTX1 OTX1 2 1 4 1 0.606
## 3 CBO_day_7_PAX6 PAX6 0 0 0 1 -0.567
## 4 CBO_day_7_RFX4 RFX4 0 0 0 1 -1.290
## KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 2.63e-06 -4.010 1 1.6100 CBO
## 2 1.00e+00 0.468 1 -0.0452 CBO
## 3 1.00e+00 1.440 1 -1.2700 CBO
## 4 1.00e+00 -0.839 1 -0.9720 CBO
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text_repel()`).
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## FEZF2_CE_log2FoldChange FEZF2_CE_pvalue FEZF2_CE_padj FEZF2_KO_log2FoldChange
## 1 -0.0599 0.5730 1.000 -0.00493
## 2 0.3580 0.0526 0.996 0.27000
## FEZF2_KO_pvalue FEZF2_KO_padj FEZF2_PTC_log2FoldChange FEZF2_PTC_pvalue
## 1 0.967 1 -0.0192 0.864
## 2 0.193 1 0.1760 0.369
## FEZF2_PTC_padj
## 1 1
## 2 1
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## OTX1_CE_log2FoldChange OTX1_CE_pvalue OTX1_CE_padj OTX1_KO_log2FoldChange
## 1 0.239 0.236 0.921 0.0075
## 2 -0.551 0.122 0.833 0.0124
## OTX1_KO_pvalue OTX1_KO_padj OTX1_PTC_log2FoldChange OTX1_PTC_pvalue
## 1 0.963 1 -0.183 0.250
## 2 0.966 1 -0.163 0.567
## OTX1_PTC_padj
## 1 1
## 2 1
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1 0.138 0.3800 1 0.281
## 2 -0.487 0.0667 1 -0.399
## RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1 0.0724 1 0.038 0.8080
## 2 0.1300 1 -0.453 0.0852
## RFX4_PTC_padj
## 1 1
## 2 1
## KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_8_FEZF2 FEZF2 0 5 2 1.000 -0.341
## 2 CBO_day_8_OTX1 OTX1 10 37 13 0.283 1.280
## 3 CBO_day_8_RFX4 RFX4 8 12 11 1.000 -1.060
## KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 1.75e-25 -3.710 0.0916 0.807 CBO
## 2 6.74e-01 0.714 1.0000 0.327 CBO
## 3 1.00e+00 -0.431 1.0000 -1.370 CBO
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## OTX1_CE_log2FoldChange OTX1_CE_pvalue OTX1_CE_padj OTX1_KO_log2FoldChange
## 1 0.171 0.285 1 0.106
## 2 -0.201 0.470 1 0.236
## OTX1_KO_pvalue OTX1_KO_padj OTX1_PTC_log2FoldChange OTX1_PTC_pvalue
## 1 0.533 1 0.0774 0.630
## 2 0.420 1 -0.1980 0.478
## OTX1_PTC_padj
## 1 1
## 2 1
## gene_ID hgnc_symbol chr strand start end
## 1 ENSG00000271254 KI270711.1 - 4612 29626
## 2 ENSG00000277196 KI270734.1 - 138082 161852
## RFX4_CE_log2FoldChange RFX4_CE_pvalue RFX4_CE_padj RFX4_KO_log2FoldChange
## 1 0.2880 0.0774 1 -0.0742
## 2 -0.0357 0.8770 1 0.3340
## RFX4_KO_pvalue RFX4_KO_padj RFX4_PTC_log2FoldChange RFX4_PTC_pvalue
## 1 0.649 1 0.212 0.200
## 2 0.144 1 0.242 0.303
## RFX4_PTC_padj
## 1 1
## 2 1
## KO gene_symbol CE_nDE KO_nDE PTC_nDE CE_padj CE_log2FoldChange
## 1 CBO_day_9_OTX1 OTX1 37 40 44 1 0.613
## 2 CBO_day_9_RFX4 RFX4 37 56 33 1 -1.160
## KO_padj KO_log2FoldChange PTC_padj PTC_log2FoldChange Model
## 1 1.000 0.447 1 0.383 CBO
## 2 0.269 -2.670 1 -1.230 CBO
Also output the DE gene sets for the gene set enrichment analysis.
In some (day group, knockout gene, knockout strategy) combinations, there are too many DE genes and they cannot all be labeled in the volcano plots.
df_file_info## DE_group gene items
## 1 CBO_day_3_4 LHX5 CBO_day_3_4_LHX5
## 2 CBO_day_5 FEZF2 CBO_day_5_FEZF2
## 3 CBO_day_5 LHX5 CBO_day_5_LHX5
## 4 CBO_day_6 FEZF2 CBO_day_6_FEZF2
## 5 CBO_day_6 LHX5 CBO_day_6_LHX5
## 6 CBO_day_6 RFX4 CBO_day_6_RFX4
## 7 CBO_day_7 FEZF2 CBO_day_7_FEZF2
## 8 CBO_day_7 OTX1 CBO_day_7_OTX1
## 9 CBO_day_7 PAX6 CBO_day_7_PAX6
## 10 CBO_day_7 RFX4 CBO_day_7_RFX4
## 11 CBO_day_8 FEZF2 CBO_day_8_FEZF2
## 12 CBO_day_8 OTX1 CBO_day_8_OTX1
## 13 CBO_day_8 RFX4 CBO_day_8_RFX4
## 14 CBO_day_9 OTX1 CBO_day_9_OTX1
## 15 CBO_day_9 RFX4 CBO_day_9_RFX4
fc0## [1] 0.5849625
p0## [1] 0.05
for(k in 1:nrow(df_file_info)){
it_k = df_file_info$items[k]
d_group = df_file_info$DE_group[k]
gene_name = df_file_info$gene[k]
# create a list to store DE gene set for the gene set enrichment analysis
de_gene_sets = list()
print(it_k)
res = fread(file.path(processed_output_dir,
paste0(release_name, "_", it_k, "_DEseq2.tsv")),
data.table = FALSE)
for (ko1 in c("CE", "KO", "PTC")){
res_k = res[,c(1, grep(ko1, names(res)))]
names(res_k) = gsub(paste0(gene_name, "_", ko1, "_"), "", names(res_k))
ww_up = which((res_k$log2FoldChange > fc0) & (res_k$padj < p0))
ww_down = which((res_k$log2FoldChange < ((-1)*fc0)) & (res_k$padj < p0))
de_gene_sets[[paste0(gene_name, "_", ko1, "_up")]] = res_k[ww_up,]$gene_ID
de_gene_sets[[paste0(gene_name, "_", ko1, "_down")]] = res_k[ww_down,]$gene_ID
res_k$DE = "No"
res_k$DE[ww_up] <- "Up"
res_k$DE[ww_down] <- "Down"
res_k$log2FoldChange[which(res_k$log2FoldChange > 3)] = 3
res_k$log2FoldChange[which(res_k$log2FoldChange < -3)] = -3
col2use = c("blue", "grey", "red")
names(col2use) = c("Down", "No", "Up")
res_k$delabel = NA
res_k$gene_symbol = gene_info$gene_symbol[match(res_k$gene_ID,
gene_info$ensembl_gene_id)]
w_de = which(res_k$DE != "No")
res_k$delabel[w_de] = res_k$gene_symbol[w_de]
print(table(res_k$DE))
g2 = ggplot(res_k, aes(x=log2FoldChange, y=-log10(padj),
col=DE, label=delabel)) +
geom_point() + ggtitle(paste0(d_group, "\n", gene_name, "_", ko1)) +
geom_text_repel(point.padding = 0.5,
min.segment.length = 0,
size = 3, # Adjust text size
box.padding = 0.5,
max.overlaps = 20,
show.legend = FALSE) +
scale_color_manual(values=col2use)
print(g2)
}
# save out the DE gene lists for gene set enrichment analysis
DE_set_output_dir = sprintf("%s/DE_gene_lists", result_dir)
if (!file.exists(DE_set_output_dir)){
dir.create(DE_set_output_dir)
}
saveRDS(de_gene_sets,
sprintf("%s/%s_%s_DE_gene_list.rds",
DE_set_output_dir, release_name, it_k))
}## [1] "CBO_day_3_4_LHX5"
##
## Down No Up
## 6 23232 4
## Warning: Removed 2138 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23232 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No
## 7 23235
## Warning: Removed 1891 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23235 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 10 23231 1
## Warning: Removed 2332 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23231 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_5_FEZF2"
##
## Down No Up
## 54 23385 128
## Warning: Removed 10510 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23385 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 163 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 2 23558 7
## Warning: Removed 14164 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23558 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 4 23545 18
## Warning: Removed 12338 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23545 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "CBO_day_5_LHX5"
##
## Down No
## 2 23565
## Warning: Removed 5037 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23565 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 13 23395 159
## Warning: Removed 12793 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23395 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 157 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No
## 1 23566
## Warning: Removed 825 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23566 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_6_FEZF2"
##
## Down No Up
## 15 23874 22
## Warning: Removed 9739 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23874 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 12 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 1 23905 5
## Warning: Removed 2004 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23905 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 1 23902 8
## Warning: Removed 1924 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23902 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_6_LHX5"
##
## Down No Up
## 16 23839 56
## Warning: Removed 1732 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23839 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 42 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## No Up
## 23910 1
## Warning: Removed 1658 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23910 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No Up
## 23908 3
## Warning: Removed 1694 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23908 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_6_RFX4"
##
## Down No Up
## 1 23905 5
## Warning: Removed 1998 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23905 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 2 23899 10
## Warning: Removed 1971 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23899 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No Up
## 23901 10
## Warning: Removed 1899 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23901 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_7_FEZF2"
##
## Down No
## 1 23664
## Warning: Removed 14223 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23664 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 2 23662 1
## Warning: Removed 1712 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23662 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No
## 23665
## Warning: Removed 1738 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_7_OTX1"
##
## No Up
## 23663 2
## Warning: Removed 1669 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23663 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No Up
## 23664 1
## Warning: Removed 1601 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23664 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 1 23661 3
## Warning: Removed 1746 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23661 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_7_PAX6"
##
## No
## 23665
## Warning: Removed 1971 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No
## 23665
## Warning: Removed 1964 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No
## 23665
## Warning: Removed 1894 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_7_RFX4"
##
## No
## 23665
## Warning: Removed 1872 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No
## 23665
## Warning: Removed 1924 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## No
## 23665
## Warning: Removed 1900 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 23665 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_8_FEZF2"
##
## No
## 23268
## Warning: Removed 2112 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23268 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 2 23263 3
## Warning: Removed 2236 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23263 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 1 23266 1
## Warning: Removed 2213 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23266 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_8_OTX1"
##
## Down No Up
## 3 23258 7
## Warning: Removed 2052 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23258 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 9 23231 28
## Warning: Removed 2292 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23231 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 5 23255 8
## Warning: Removed 2349 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23255 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_8_RFX4"
##
## Down No Up
## 2 23260 6
## Warning: Removed 2216 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23260 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 3 23256 9
## Warning: Removed 2241 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23256 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
##
## Down No Up
## 2 23257 9
## Warning: Removed 2259 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 23257 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## [1] "CBO_day_9_OTX1"
##
## Down No Up
## 9 22886 28
## Warning: Removed 702 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22886 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 20 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 13 22883 27
## Warning: Removed 840 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22883 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 14 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 13 22879 31
## Warning: Removed 747 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22879 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## [1] "CBO_day_9_RFX4"
##
## Down No Up
## 8 22886 29
## Warning: Removed 813 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22886 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 15 22867 41
## Warning: Removed 732 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22867 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 22 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
##
## Down No Up
## 9 22890 24
## Warning: Removed 838 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 22890 rows containing missing values or values outside the scale range
## (`geom_text_repel()`).
## Warning: ggrepel: 9 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7855839 419.6 12594381 672.7 NA 12594381 672.7
## Vcells 22783465 173.9 57594827 439.5 65536 47929023 365.7
sessionInfo()## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] readxl_1.4.3 UpSetR_1.4.0
## [3] ComplexHeatmap_2.14.0 data.table_1.15.4
## [5] dplyr_1.1.4 ggpointdensity_0.1.0
## [7] viridis_0.6.5 viridisLite_0.4.2
## [9] DESeq2_1.38.3 SummarizedExperiment_1.28.0
## [11] Biobase_2.58.0 MatrixGenerics_1.10.0
## [13] matrixStats_1.3.0 GenomicRanges_1.50.2
## [15] GenomeInfoDb_1.34.9 IRanges_2.32.0
## [17] S4Vectors_0.36.2 BiocGenerics_0.44.0
## [19] RColorBrewer_1.1-3 MASS_7.3-60
## [21] stringr_1.5.1 ggpubr_0.6.0
## [23] ggrepel_0.9.5 ggplot2_3.5.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-8 bit64_4.0.5 doParallel_1.0.17
## [4] httr_1.4.7 tools_4.2.3 backports_1.5.0
## [7] bslib_0.8.0 utf8_1.2.4 R6_2.5.1
## [10] DBI_1.2.3 colorspace_2.1-1 GetoptLong_1.0.5
## [13] withr_3.0.1 tidyselect_1.2.1 gridExtra_2.3
## [16] bit_4.0.5 compiler_4.2.3 cli_3.6.3
## [19] DelayedArray_0.24.0 labeling_0.4.3 sass_0.4.9
## [22] scales_1.3.0 digest_0.6.37 rmarkdown_2.28
## [25] XVector_0.38.0 pkgconfig_2.0.3 htmltools_0.5.8.1
## [28] highr_0.11 fastmap_1.2.0 GlobalOptions_0.1.2
## [31] rlang_1.1.4 rstudioapi_0.16.0 RSQLite_2.3.7
## [34] farver_2.1.2 shape_1.4.6.1 jquerylib_0.1.4
## [37] generics_0.1.3 jsonlite_1.8.8 BiocParallel_1.32.6
## [40] car_3.1-2 RCurl_1.98-1.16 magrittr_2.0.3
## [43] GenomeInfoDbData_1.2.9 Matrix_1.5-4.1 Rcpp_1.0.13
## [46] munsell_0.5.1 fansi_1.0.6 abind_1.4-5
## [49] lifecycle_1.0.4 stringi_1.8.4 yaml_2.3.10
## [52] carData_3.0-5 zlibbioc_1.44.0 plyr_1.8.9
## [55] blob_1.2.4 parallel_4.2.3 crayon_1.5.3
## [58] lattice_0.22-6 cowplot_1.1.3 Biostrings_2.66.0
## [61] annotate_1.76.0 circlize_0.4.16 KEGGREST_1.38.0
## [64] locfit_1.5-9.10 knitr_1.48 pillar_1.9.0
## [67] rjson_0.2.21 ggsignif_0.6.4 geneplotter_1.76.0
## [70] codetools_0.2-20 XML_3.99-0.17 glue_1.7.0
## [73] evaluate_0.24.0 foreach_1.5.2 vctrs_0.6.5
## [76] png_0.1-8 cellranger_1.1.0 gtable_0.3.5
## [79] purrr_1.0.2 tidyr_1.3.1 clue_0.3-65
## [82] cachem_1.1.0 xfun_0.47 xtable_1.8-4
## [85] broom_1.0.6 rstatix_0.7.2 tibble_3.2.1
## [88] iterators_1.0.14 AnnotationDbi_1.60.2 memoise_2.0.1
## [91] cluster_2.1.6